{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pandas import Series, DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>LOCATION</th>\n",
       "      <th>SUBJECT</th>\n",
       "      <th>TIME</th>\n",
       "      <th>Value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AUS</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2008</td>\n",
       "      <td>31159.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AUS</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2009</td>\n",
       "      <td>29980.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AUS</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2010</td>\n",
       "      <td>35165.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AUS</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2011</td>\n",
       "      <td>38710.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AUS</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2012</td>\n",
       "      <td>38003.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  LOCATION  SUBJECT  TIME    Value\n",
       "0      AUS  INT_REC  2008  31159.8\n",
       "1      AUS  INT_REC  2009  29980.7\n",
       "2      AUS  INT_REC  2010  35165.5\n",
       "3      AUS  INT_REC  2011  38710.1\n",
       "4      AUS  INT_REC  2012  38003.7"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Load the OECD tourism data (from `oecd_tourism.oecd`) into a data frame.\n",
    "\n",
    "tourism_filename = '../data/oecd_tourism.csv'\n",
    "tourism_df = pd.read_csv(tourism_filename, \n",
    "                        usecols=['LOCATION', 'SUBJECT', 'TIME', 'Value'])\n",
    "\n",
    "tourism_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LOCATION\n",
       "USA    201613.500000\n",
       "ESP     69655.817364\n",
       "FRA     65063.335727\n",
       "DEU     53408.570636\n",
       "GBR     51752.090909\n",
       "Name: Value, dtype: float64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Find the five countries that received the greatest amount of tourist dollars,\n",
    "# on average, in the data set.\n",
    "(\n",
    "    tourism_df\n",
    "    .loc[tourism_df['SUBJECT'] == 'INT_REC']\n",
    "    .groupby('LOCATION')['Value']\n",
    "    .mean()\n",
    "    .sort_values(ascending=False)\n",
    "    .head()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LOCATION\n",
       "MLT     387.801667\n",
       "CRI     867.075000\n",
       "LVA     919.545455\n",
       "ISL    1072.819636\n",
       "HRV    1115.628083\n",
       "Name: Value, dtype: float64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Find the five countries whose citizens spent the least amount of tourist dollars, \n",
    "# on average, in the data set.\n",
    "(\n",
    "    tourism_df\n",
    "    .loc[tourism_df['SUBJECT'] == 'INT-EXP']\n",
    "    .groupby('LOCATION')['Value']\n",
    "    .mean()\n",
    "    .sort_values()\n",
    "    .head()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NAME</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LOCATION</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AUS</th>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AUT</th>\n",
       "      <td>Austria</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BEL</th>\n",
       "      <td>Belgium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CAN</th>\n",
       "      <td>Canada</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DNK</th>\n",
       "      <td>Denmark</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               NAME\n",
       "LOCATION           \n",
       "AUS       Australia\n",
       "AUT         Austria\n",
       "BEL         Belgium\n",
       "CAN          Canada\n",
       "DNK         Denmark"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# I've created a separate CSV file, `oecd_locations.csv`, with two columns. \n",
    "# One contains the three-letter abbreviated location name you saw in the first CSV \n",
    "# file. The second is the full country name. Load this into a data frame, \n",
    "# using the abbreviated data as an index.\n",
    "\n",
    "locations_filename = '../data/oecd_locations.csv'\n",
    "locations_df = pd.read_csv(locations_filename,\n",
    "                          header=None,\n",
    "                           names=['LOCATION', 'NAME'],\n",
    "                          index_col='LOCATION')\n",
    "\n",
    "locations_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NAME</th>\n",
       "      <th>SUBJECT</th>\n",
       "      <th>TIME</th>\n",
       "      <th>Value</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LOCATION</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AUS</th>\n",
       "      <td>Australia</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2008</td>\n",
       "      <td>31159.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AUS</th>\n",
       "      <td>Australia</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2009</td>\n",
       "      <td>29980.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AUS</th>\n",
       "      <td>Australia</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2010</td>\n",
       "      <td>35165.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AUS</th>\n",
       "      <td>Australia</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2011</td>\n",
       "      <td>38710.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AUS</th>\n",
       "      <td>Australia</td>\n",
       "      <td>INT_REC</td>\n",
       "      <td>2012</td>\n",
       "      <td>38003.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               NAME  SUBJECT  TIME    Value\n",
       "LOCATION                                   \n",
       "AUS       Australia  INT_REC  2008  31159.8\n",
       "AUS       Australia  INT_REC  2009  29980.7\n",
       "AUS       Australia  INT_REC  2010  35165.5\n",
       "AUS       Australia  INT_REC  2011  38710.1\n",
       "AUS       Australia  INT_REC  2012  38003.7"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Join these two data frames together into a new one, In the new data frame,\n",
    "# there will be no `LOCATION` column. Instead, there will be a `NAME` column, \n",
    "# with the full name of the country.\n",
    "\n",
    "fullname_df = locations_df.join(tourism_df.set_index('LOCATION'))\n",
    "fullname_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NAME\n",
       "United States     201613.500000\n",
       "France             65063.335727\n",
       "Germany            53408.570636\n",
       "United Kingdom     51752.090909\n",
       "Italy              44930.211545\n",
       "Name: Value, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(\n",
    "    fullname_df\n",
    "    .loc[fullname_df['SUBJECT'] == 'INT_REC']\n",
    "    .groupby('NAME')['Value']\n",
    "    .mean()\n",
    "    .sort_values(ascending=False)\n",
    "    .head()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NAME\n",
       "Finland    4700.236273\n",
       "Brazil     6321.476083\n",
       "Israel     6542.383250\n",
       "Hungary    7299.353000\n",
       "Denmark    9398.957636\n",
       "Name: Value, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(\n",
    "    fullname_df\n",
    "    .loc[fullname_df['SUBJECT'] == 'INT_REC']\n",
    "    .groupby('NAME')['Value']\n",
    "    .mean()\n",
    "    .sort_values()\n",
    "    .head()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(16, 1)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "locations_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
